**********************************************************************************************
8/20/09:  ADDED 2006 & 2007 DATA.  KEY CHANGE IS NOW DISTINGUISH BETWEEN CARS AND VANS
	      FOR AUTOS WITH MAKE ONLY (JUST_MAKE=1).  THE 2006 & 2007 DATA IS ALL MAKE ONLY.
		  SEARCH: X-NEW CHANGE-X IN CODE BELOW
12/18/09:  ADDED 2008 DATA.  

11/3/10:  ADDED 2009 DATA. 

10/20/11:  ADDED 2009 DATA.  

10/30/15:  ADDED 2014 DATA.  

9/5/17:  ADDED 2015 & 2016 DATA.  

9/5/17:  ADDED 2017 DATA.  

**********************************************************************************************;

******************MASTER DATA.SAS**************************************************************;

filename car8081 'D:\Documents\CEXdata\Cars\cars_data\ovb.y80';
filename car84 'D:\Documents\CEXdata\Cars\cars_data\da8671.ovb.y8485';
filename car85 'D:\Documents\CEXdata\Cars\cars_data\da8904.ovb.y8586';
filename car86 'D:\Documents\CEXdata\Cars\cars_data\da9113.ovb.y8687';
filename car87 'D:\Documents\CEXdata\Cars\cars_data\da9332.ovb.y8788';
filename car88 'D:\Documents\CEXdata\Cars\cars_data\da9842.ovb.y8889_data_lrec';
filename car89 'D:\Documents\CEXdata\Cars\cars_data\da9841.ovb.y8990_data_lrec';
filename car90 'D:\Documents\CEXdata\Cars\cars_data\da9817.ovb.y9091_data_lrec';
filename car91 'D:\Documents\CEXdata\Cars\cars_data\da6262.ovb.y9192';
filename car92 'D:\Documents\CEXdata\Cars\cars_data\da6440.ovb.y9293_data_lrec';
filename car93 'D:\Documents\CEXdata\Cars\cars_data\da6543.ovb.y93';
filename car94 'D:\Documents\CEXdata\Cars\cars_data\da6710.ovb.y94';
filename car95 'D:\Documents\CEXdata\Cars\cars_data\da2264.ovb.y95';
filename car96 'D:\Documents\CEXdata\Cars\cars_data\da2794.ovb';
filename car97 'D:\Documents\CEXdata\Cars\cars_data\da2838.ovb';
filename car98 'D:\Documents\CEXdata\Cars\cars_data\da2971.ovb';
filename car99 'D:\Documents\CEXdata\Cars\cars_data\da3228.ovb';
filename car00 'D:\Documents\CEXdata\Cars\cars_data\da3396.ovb';
filename car01 'D:\Documents\CEXdata\Cars\cars_data\03674-0047-Data.ovb.txt';
filename car02 'D:\Documents\CEXdata\Cars\cars_data\03949-0046-data.ovb.txt';
filename car03 'D:\Documents\CEXdata\Cars\cars_data\ovb03.txt';
filename car04 'D:\Documents\CEXdata\Cars\cars_data\ovb04.txt';
filename car05 'D:\Documents\CEXdata\Cars\cars_data\ovb05.txt';
filename car06 'D:\Documents\CEXdata\Cars\cars_data\ovb06.txt';
filename car07 'D:\Documents\CEXdata\Cars\cars_data\ovb07.txt';
filename car08 'D:\Documents\CEXdata\Cars\cars_data\ovb08.txt';
filename car09 'D:\Documents\CEXdata\Cars\cars_data\ovb09.txt';
filename car10 'D:\Documents\CEXdata\Cars\cars_data\ovb10.txt';


options obs=max;

*****SECTION I: Read in data and Merge**********************************;

*************************1980 and 1981************************;
*qtradex variable not being read correctly;
data auto8081;
infile car8081 lrecl=350;
input qyear 1-3 newid 4-11 vehicib 19-20 vehicyb 22-24 vehicyr 26-27 mkmdly 29-31 cylq 33-34 autotran 36
pwrsteer 38 pwrbrake 40 aircar 42 diesel 48 vpursrce 59 vehnewu 57 vehgftc 61 vehpurmo 70-71
vehpuryr 73-74 tradex 82-87 netpurx 89-94 qtradex 265-274;

newid=9000000+newid;
qyear=19000+qyear;
vehpuryr=1900+vehpuryr;
IF qyear<19811 THEN year=1980;
ELSE IF qyear<19821 THEN year=1981;
ELSE year=1982;


******************1984**********************;
data auto84;
infile car84 lrecl=350;
input qyear 1-3 newid 4-11 vehicib 19-20 vehicyb 22-24 vehicyr  26-27 mkmdly 29-31 cylq 33-34 autotran 36
pwrsteer 38 pwrbrake 40 aircar 42 diesel 48 vpursrce 59 vehnewu 57 vehgftc 61 vehpurmo 70-71 
vehpuryr 73-74 tradex 82-87  netpurx 89-94 qtradex 268-277; 

year=1984;
newid=9000000+newid;
qyear=19000+qyear; 
vehpuryr=1900+vehpuryr; 
IF qyear NE 19851; 
RUN;

******************1985**********************;
data auto85;
infile car85 lrecl=350;
input qyear 1-3 newid 4-11 vehicib 19-20 vehicyb 22-24 vehicyr  26-27 mkmdly 29-31 cylq 33-34 autotran 36 
pwrsteer 38 pwrbrake 40 aircar 42 diesel 48 vpursrce 59 vehnewu 57 vehgftc 61 vehpurmo 70-71
vehpuryr 73-74 tradex 82-87  netpurx 89-94 qtradex 268-277; 

year=1985;
qyear=19000+qyear; 
vehpuryr=1900+vehpuryr; 
RUN;

PROC SORT;
	BY newid;
RUN;

*****************1986****************;
data auto86;
infile car86 lrecl=350;
input qyear 1-3 newid 4-11 vehicib 19-20 vehicyb 22-24 vehicyr  26-27 mkmdly 29-31 cylq 33-34 autotran 36 
pwrsteer 38 pwrbrake 40 aircar 42 diesel 48 vpursrce 59 vehnewu 57 vehgftc 61 vehpurmo 70-71 
vehpuryr 73-74 tradex 82-87  netpurx 89-94 qtradex 268-277; 

year=1986;
qyear=19000+qyear;
vehpuryr=1900+vehpuryr;
IF qyear NE 19871;
RUN;
PROC SORT;
	BY newid;
RUN;

*******eliminate repeat observations in 19854 and 19861****;
DATA auto85; 
MERGE auto85(IN=in1) auto86(IN=in2);
BY newid; 
if in2 THEN dat2=1;
else dat2=0;
IF in1;
IF dat2=1 THEN DELETE;
RUN; 
DATA auto85;
SET auto85;
newid=9000000+newid;
RUN;

*****************1987****************;
data auto87;
infile car87 lrecl=350;
input qyear 1-3 newid 4-11 vehicib 19-20 vehicyb 22-24 vehicyr  26-27 mkmdly 29-31 cylq 33-34 autotran 36 
pwrsteer 38 pwrbrake 40 aircar 42 diesel 48 vpursrce 59 vehnewu 57 vehgftc 61 vehpurmo 70-71 
vehpuryr 73-74 tradex 82-87  netpurx 89-94 qtradex 268-277;

year=1987;
qyear=19000+qyear; 
vehpuryr=1900+vehpuryr; 
IF qyear NE 19881; 
RUN;

*****************1988****************;
*two codebooks for this year, if numbers aren't working, have wrong codebook;
*sunroof, turbochg, frwhldrv, numdoor, and autotype new variables in this year;
*"vehicib" variable gone until 1996;
DATA auto88;  
INFILE car88;
INPUT qyear 1-3 newid 4-11 vehicyb 19-21 vehicyr 23-24 mkmdly 26-28 cylq 30-31 autotran 33 pwrsteer 35
pwrbrake 37 aircar 39 sunroof 41 turbochg 43 diesel 45 frwhldrv 47 numdoor 49 autotype 51 
vehnewu 58 vpursrce 60 vehgftc 62 vehpurmo 71-72 vehpuryr 74-75 tradex 79-84 netpurx 86-91 
qtradex 243-250; 

year=1988;
qyear=19000+qyear; 
vehpuryr=1900+vehpuryr; 
IF qyear NE 19891; 
RUN;

*****************1989****************;
*two codebooks for this year, if numbers aren't working, have wrong codebook;
*no "vehmile" or "vehicib" variables for this year;
DATA auto89;  
INFILE car89 lrecl=350;
INPUT qyear 1-3 newid 4-11 vehicyb 19-21 vehicyr 23-24 mkmdly 26-28 cylq 30-31 autotran 33 pwrsteer 35
pwrbrake 37 aircar 39 sunroof 41 turbochg 43 diesel 45 frwhldrv 47 numdoor 49 autotype 51 
vehnewu 58 vpursrce 60 vehgftc 62 vehpurmo 71-72 vehpuryr 74-75
tradex 79-84 netpurx 86-91 qtradex 243-250; 

year=1989;
qyear=19000+qyear; 
vehpuryr=1900+vehpuryr; 
IF qyear NE 19901; 
RUN;

*****************1990****************;
*two codebooks for this year, if numbers aren't working, have wrong codebook;
*no "vehmile" or "vehicib" variables for this year;
DATA auto90;  
INFILE car90 lrecl=350;
INPUT qyear 1-3 newid 4-11 vehicyb 19-21 vehicyr 23-24 mkmdly 26-28 cylq 30-31 autotran 33 pwrsteer 35
pwrbrake 37 aircar 39 sunroof 41 turbochg 43 diesel 45 frwhldrv 47 numdoor 49 autotype 51 
vehnewu 58 vpursrce 60 vehgftc 62 vehpurmo 71-72 vehpuryr 74-75
tradex 79-84 netpurx 86-91 qtradex 243-250; 

year=1990;
qyear=19000+qyear; 
vehpuryr=1900+vehpuryr; 
IF qyear NE 19911; 
RUN;

*****************1991****************;
*two codebooks for this year, if numbers aren't working, have wrong codebook;
*no "vehicib" variables for this year;
*"vehmile" new variable in Q19912;
*"mkmdly" moved to new position and changed from 3 to 4 digit code;
DATA auto91;  
INFILE car91 lrecl=350;
INPUT qyear 1-3 newid 4-11 vehicyb 19-21 vehicyr 23-24 mkmdly 307-310 cylq 30-31 autotran 33 pwrsteer 35
pwrbrake 37 aircar 39 sunroof 41 turbochg 43 diesel 45 frwhldrv 47 numdoor 49 autotype 51 
vehnewu 58 vpursrce 60 vehgftc 62 vehpurmo 71-72 vehpuryr 74-75
tradex 79-84 netpurx 86-91 qtradex 243-250 vehmile 298-303; 

year=1991;
qyear=19000+qyear; 
vehpuryr=1900+vehpuryr; 
IF qyear NE 19921; 
RUN;

******************1992**********************;
data auto92;
infile car92 lrecl=295;
input qyear 1-3 newid 4-11 vehicyb 19-21 vehicyr  23-24 mkmdly 26-29 cylq 31-32 autotran 34 pwrsteer 36
pwrbrake 38 aircar 40 sunroof 42 turbochg 44 diesel 46 frwhldrv 48 numdoor 50 autotype 52 
vpursrce 61 vehnewu 59 vehgftc 63 vehpurmo 65-66 vehpuryr 68-69 tradex 73-78  netpurx 80-85
qtradex 237-244 vehmile 285-290;

year=1992;
qyear=19000+qyear;
vehpuryr=1900+vehpuryr;
IF qyear NE 19931;
RUN;

******************1993**********************;
data auto93;
infile car93 lrecl=295;
input qyear 1-3 newid 4-11 vehicyb 19-21 vehicyr  23-24 mkmdly 26-29 cylq 31-32 autotran 34 pwrsteer 36
pwrbrake 38 aircar 40 sunroof 42 turbochg 44 diesel 46 frwhldrv 48 numdoor 50 autotype 52 
vpursrce 61 vehnewu 59 vehgftc 63 vehpurmo 65-66 vehpuryr 68-69 tradex 73-78  netpurx 80-85 
qtradex 237-244 vehmile 285-290;

year=1993;
qyear=19000+qyear;
vehpuryr=1900+vehpuryr;
IF qyear NE 19941;
RUN;

******************1994**********************;
data auto94;
infile car94 lrecl=295;
input qyear 1-3 newid 4-11 vehicyb 19-21 vehicyr  23-24 mkmdly 26-29 cylq 31-32 autotran 34 pwrsteer 36 pwrbrake 38 
aircar 40 sunroof 42 turbochg 44 diesel 46 frwhldrv 48 numdoor 50 autotype 52 vpursrce 61 
vehnewu 59 vehgftc 63 vehpurmo 65-66 vehpuryr 68-69 tradex 73-78  netpurx 80-85 qtradex 237-244 
vehmile 285-290;

year=1994;
qyear=19000+qyear;
vehpuryr=1900+vehpuryr;
IF qyear NE 19951;
RUN;

******************1995**********************;
data auto95;
infile car95 lrecl=295;
input qyear 1-3 newid 4-11 vehicyb 19-21 vehicyr  23-24 mkmdly 26-29 cylq 31-32 autotran 34 pwrsteer 36 pwrbrake 38 
aircar 40 sunroof 42 turbochg 44 diesel 46 frwhldrv 48 numdoor 50 autotype 52 vpursrce 61 
vehnewu 59 vehgftc 63 vehpurmo 65-66 vehpuryr 68-69 tradex 73-78  netpurx 80-85 qtradex 237-244 

vehmile 285-290;
year=1995;
qyear=19000+qyear;
vehpuryr=1900+vehpuryr;
RUN;
PROC SORT;
	BY newid;
RUN;

******************1996**********************;
data auto96;
infile car96 lrecl=310;
input qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 mkmdly 28-31 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301;

year=1996;
IF qyear NE 19971;
RUN;
PROC SORT;
	BY newid;
RUN;

*******eliminate repeat observations in 19955 and 19961****;
DATA temp;
SET auto96;
BY newid;
IF first.newid;
RUN;
* NOTE:  ALSO RAN THIS WITH OUT CONDITIONING ON FIRST.NEWID AND GET SAME RESULT;
DATA auto95; 
MERGE auto95(IN=in1) temp(IN=in2);
BY newid; 
if in2 THEN dat2=1;
else dat2=0;
IF in1;
IF dat2=1 THEN DELETE;
RUN; 

******************1997*********************;
data auto97;
infile car97 lrecl=310;
input qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 mkmdly 28-31 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301;

year=1997;
IF qyear NE 19981;
RUN;

******************1998**********************;
data auto98;
infile car98 lrecl=310;
input qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 mkmdly 28-31 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301;

year=1998;
IF qyear NE 19991;
RUN;

*******************1999********************;
data auto99;
infile car99 lrecl=310;
input qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 mkmdly 28-31 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301;

year=1999;
IF qyear NE 20001;
RUN;

********************2000*****************;
data auto00;
infile car00 lrecl=310;
input qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 mkmdly 28-31 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301;

year=2000;
IF qyear NE 20011;
RUN;

********************2001*****************;
data auto01;
infile car01 lrecl=310;
input qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 mkmdly 28-31 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301;

year=2001;
IF qyear NE 20021;
RUN;

***********************2002*********************;
Data auto02;
infile car02 lrecl=350;
input qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 mkmdly 28-31 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301;

year=2002;
IF qyear NE 20031;
RUN;

*********************2003***********************;
*mkmdly replaced with mkmodel which is a 5 digit character variable 
*instead of a 4 digit numeric variable;
Data auto03;
infile car03 lrecl=350;
input qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301 mkmodel $ 303-307;

year=2003;
IF qyear NE 20041;
RUN;

********************2004**********************;
DATA auto04;
INFILE car04 lrecl=350;
INPUT qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301 mkmodel $ 303-307;   
year=2004;
RUN;
PROC SORT;
	BY newid;
RUN;
********************2005**********************;
DATA auto05;
INFILE car05 lrecl=350;
INPUT qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301 mkmodel $ 303-307 fueltype 314 vpurindv 316;   
year=2005;
RUN;
PROC SORT;
	BY newid;
RUN;
******* DELETE OBS IN 045 THAT ARE ALSO IN 051 ****;
DATA temp;
SET auto05;
BY newid;
IF first.newid;
RUN;
* NOTE:  ALSO RAN THIS WITH OUT CONDITIONING ON FIRST.NEWID AND GET SAME RESULT;
DATA auto04; 
MERGE auto04(IN=in1) temp(IN=in2);
BY newid; 
if in2 THEN dat2=1;
else dat2=0;
IF in1;
IF dat2=1 THEN DELETE;
RUN; 

********************2006**********************;
DATA auto06;
INFILE car06 lrecl=350;
INPUT qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301 mkmodel $ 303-307 fueltype 314 vpurindv 316;   

* NOTE:  KEEP 2005 QYR5 INSTEAD OF 2006 QTR1 BECAUSE THE FORMER HAS VEHICLE MODEL.  I HAVE VERIFIED THAT
         THESE TWO DATASETS ARE IDENTICAL EXCEPT FOR A HANDFUL OF OBS FOR NETPURX AND QTRADEX;
IF qyear NE 20061;
IF qyear NE 20071;
year=2006;
RUN;

********************2007**********************;
DATA auto07;
INFILE car07 lrecl=350;
INPUT qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301 mkmodel $ 303-307 fueltype 314 vpurindv 316;   

IF qyear NE 20081;
year=2007;
RUN;

********************2008**********************;
DATA auto08;
INFILE car08 lrecl=350;
INPUT qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301 mkmodel $ 303-307 fueltype 314 vpurindv 316;   

IF qyear NE 20091;
year=2008;
RUN;

********************2009**********************;
DATA auto09;
INFILE car09 lrecl=350;
INPUT qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301 mkmodel $ 303-307 fueltype 314 vpurindv 316;   

IF qyear NE 20101;
year=2009;
RUN;

********************2010**********************;
DATA auto10;
INFILE car10 lrecl=350;
INPUT qyear 1-5 newid 6-13 vehicyb 21-23 vehicyr  25-26 cylq 33-34 autotran 36 pwrsteer 38 pwrbrake 40 
aircar 42 sunroof 44 turbochg 46 diesel 48 frwhldrv 50 numdoor 52 autotype 54 vpursrce 63 
vehnewu 61 vehgftc 65 vehpurmo 67-68 vehpuryr 70-73 tradex 77-82  netpurx 84-89 qtradex 243-250 
vehmile 291-296 vehicib 300-301 mkmodel $ 303-307 fueltype 314 vpurindv 316;   

*NOTE, FOR SOME REASON, 1ST QTR 2011 OBS ARE CODED AS QYEAR=20115, THIS IS DIFF FROM ANY OTHER YEAR;
IF qyear=20115 THEN qyear=20111;
IF qyear NE 20111;

year=2010;
RUN;



* FOR SURVEY YEARS 2011- 2016;
%MACRO read(yr,yr4,dropqtr);

  PROC IMPORT DATAFILE="D:\Documents\CEXdata\Cars\cars_data\ovb&yr..csv"
     OUT=auto
     DBMS=CSV
     REPLACE;
     GETNAMES=YES;
  RUN;
 
  DATA auto&yr;
SET auto(RENAME=(qyear=temp1  
						vehicyb=temp2 
						vehicyr=temp3  
						vehnewu=temp4 
						vehgftc=temp5 
						vehpurmo=temp6
						vehpuryr=temp7 
						tradex=temp8 
						vehicib=temp9
						fueltype=temp10
						vpurindv=temp11
						make=mkmodel
						newid=temp12));



	qyear=temp1*1;  
	vehicyb=temp2*1; 
	vehicyr=temp3*1;  
	vehnewu=temp4*1; 
	vehgftc=temp5*1; 
	vehpurmo=temp6*1;
	vehpuryr=temp7*1; 
	tradex=temp8*1; 
	vehicib=temp9*1;
	fueltype=temp10*1;
	vpurindv=temp11*1;
	newid=temp12*1;
	
year=&yr4;
IF qyear=&dropqtr THEN DELETE;

KEEP newid qyear tradex netpurx qtradex mkmodel vehicyb vehicyr vehnewu vehgftc vehpurmo vehpuryr tradex vehicib fueltype vpurindv year;
RUN;

PROC MEANS; 
RUN; 

%MEND read;


* FOR SURVEY YEARS 2017 AND AFTER. NOTE CARSLIB IS D:\Documents\My SAS Libraries\cex\cars;
%MACRO read2(yr,yr4,dropqtr);

 
DATA auto&yr;
SET carslib.ovb&yr(RENAME=(qyear=temp1  
						vehicyb=temp2 
						vehicyr=temp3  
						vehnewu=temp4 
						vehgftc=temp5 
						vehpurmo=temp6
						vehpuryr=temp7 
						tradex=temp8 
						vehicib=temp9
						fueltype=temp10
						vpurindv=temp11
						make=mkmodel
						newid=temp12));

	qyear=temp1*1;  
	vehicyb=temp2*1; 
	vehicyr=temp3*1;  
	vehnewu=temp4*1; 
	vehgftc=temp5*1; 
	vehpurmo=temp6*1;
	vehpuryr=temp7*1; 
	tradex=temp8*1; 
	vehicib=temp9*1;
	fueltype=temp10*1;
	vpurindv=temp11*1;
	newid=temp12*1;
	
year=&yr4;
IF qyear=&dropqtr THEN DELETE;

KEEP newid qyear tradex netpurx qtradex mkmodel vehicyb vehicyr vehnewu vehgftc vehpurmo vehpuryr tradex vehicib fueltype vpurindv year;
RUN;

PROC MEANS; 
RUN; 

%MEND read2;

%read(11,2011,20121);
%read(12,2012,20131);
%read(13,2013,20141);
%read(14,2014,.);
%read(15,2015,20161);
%read(16,2016,20171);

%read2(17,2017,.);


******************MERGING DATA (not 72-73)*********************;
DATA carslib.master2(COMPRESS=YES);
SET 
auto8081 auto84 auto85 auto86 auto87
auto88 auto89 auto90 auto91 auto92
auto93 auto94 auto95 auto96 auto97
auto98 auto99 auto00 auto01 auto02
auto03 auto04 auto05 auto06 auto07
auto08 auto09 auto10 auto11 auto12
auto13 auto14 auto15 auto16 auto17
;

***************************************************************
YEAR VARIABLE IS NOT ALWAYS SURVEY YEAR, SO CREATE A SURVEY YEAR,
BUT KEEP YEAR WHEN CARE ABOUT WHICH DATA SOURCE YEAR THE DATA
COME FROM (I.E. 2005:5 VS. 2006:1) AND FOR LAST YEAR B/C KEEP
EXTRA QTR IN THE LAST YEAR
***************************************************************;
IF year=1985 AND qyear=19861 THEN sv_year=year+1;
   ELSE IF year=1995 AND qyear=19961 THEN sv_year=year+1;
   ELSE IF year=2004 AND qyear=20051 THEN sv_year=year+1;
   ELSE IF year=2005 AND qyear=20061 THEN sv_year=year+1;
   ELSE IF year=2014 AND qyear=20151 THEN sv_year=year+1;
   ELSE IF year=2017 AND qyear=20181 THEN sv_year=year+1;
   ELSE sv_year=year;

***************************************************************
SECTION II: Create new variables
Variables described in documentation
***************************************************************;

*******Create variable for year vehicle was made*****;
IF 1980 LE year LE 1982 THEN DO;
   IF vehicyr=1 THEN yrmade=1958;
   ELSE IF vehicyr=2 THEN yrmade=1963;
   ELSE IF vehicyr=3 THEN yrmade=1968;
   ELSE IF vehicyr=4 THEN yrmade=1972;
   ELSE IF vehicyr=5 THEN yrmade=1975; 
   ELSE IF vehicyr=6 THEN yrmade=1977; 
   ELSE IF vehicyr=7 THEN yrmade=1978;
   ELSE IF vehicyr=8 THEN yrmade=1979;
   ELSE IF vehicyr=9 THEN yrmade=1980;
   ELSE IF vehicyr=10 THEN yrmade=1981;
   ELSE IF vehicyr=11 THEN yrmade=1982;
END;
IF 1984 LE year LE 1986 THEN DO;
   IF vehicyr=1 THEN yrmade=1968;
   ELSE IF vehicyr=2 THEN yrmade=1972;
   ELSE IF vehicyr=3 THEN yrmade=1977;
   ELSE IF vehicyr=4 THEN yrmade=1980+ROUND((year-1982)/2);
   ELSE IF vehicyr=5 THEN yrmade=year-1; 
   ELSE IF vehicyr=6 THEN yrmade=year; 
   ELSE IF vehicyr=7 THEN yrmade=year+1;
END;
IF year=1987 THEN DO;
   IF vehicyr=1 THEN yrmade=1968;
   ELSE IF vehicyr=2 THEN yrmade=1972;
   ELSE IF vehicyr=3 THEN yrmade=1977;
   ELSE IF vehicyr=4 THEN yrmade=1982; 
   ELSE IF vehicyr=5 THEN yrmade=1984; 
   ELSE IF vehicyr=6 THEN yrmade=1986; 
   ELSE IF vehicyr=7 THEN yrmade=1987; 
   ELSE IF vehicyr=8 THEN yrmade=1988;
END;
IF year GT 1987 THEN DO;
   IF vehicyr=1 THEN yrmade=1968;
   ELSE IF vehicyr=2 THEN yrmade=1972;
   ELSE IF vehicyr=3 THEN yrmade=1977;
   ELSE IF vehicyr=4 THEN yrmade=1981; 
   ELSE IF vehicyr=5 THEN yrmade=1984; 
   ELSE IF vehicyr GE 6 THEN yrmade=1980+vehicyr; 
END;	
* special coding of vehicyr in 1988 only;
IF year=1988 AND vehicyr=0 THEN yrmade=1989;

***************************************************
This calcualtes how long the vehicle has been owned. The
intck function subtracts the # of months btween the two dates
Creating vehicle for how long vehicle was owned
***********************************************************;
* do 1995:5 separate becasue intvyr goes to 4-digit in 1996;
intvyr=floor(qyear/10);
intvqt=(qyear-floor(qyear/10)*10);

IF intvqt=1 then intvmo=2;
ELSE IF intvqt=2 then intvmo=5;
ELSE IF intvqt=3 then intvmo=8;
ELSE IF intvqt=4 then intvmo=11;

* Correct for missing purchase month;
IF vehpurmo=. AND vehpuryr NE . THEN vehpurmo=6;
* this affects 4 obs;
IF vehpuryr=2022 THEN vehpuryr=2002;
* this affects 1 obs;
IF vehpuryr=9996 THEN vehpuryr=1996;
* this affects 4 obs;
IF vehpuryr=9999 THEN vehpuryr=1999;
* THERE ARE 2 OBS WITH VEHPURYR=0 FOR YEAR=2000, THESE ARE 
  TREATED AS VEHPURYR=2000;
IF vehpuryr=0 THEN vehpuryr=2000;

IF YEAR LT 1996 AND qyear LT 19961 
   THEN own_for=INTCK('MONTH',MDY(vehpurmo,1,vehpuryr),
   MDY(intvmo,1,intvyr));
ELSE IF YEAR LT 1996 AND qyear=19961 
   THEN own_for=INTCK('MONTH',MDY(vehpurmo,1,vehpuryr),
   MDY(intvmo,1,intvyr));
ELSE IF YEAR GE 1996 THEN own_for=INTCK('MONTH',MDY(vehpurmo,1,vehpuryr),
  MDY(intvmo,1,intvyr));

* floor own_for at zero, less than 1% are between -1 and 13;
IF own_for NE . THEN own_for=MAX(0,own_for);

******create variable for total purchase price***;
tradex=max(tradex,0);
totpurx=netpurx+tradex;

*******Vehicle Age Variable (in years)*************************;
IF YEAR LT 1996 AND qyear LT 19961 
	THEN vehage=INTCK('MONTH',MDY(6,1,yrmade),
	MDY(intvmo,1,intvyr));
ELSE IF YEAR LT 1996 AND qyear=19961 
	THEN vehage=INTCK('MONTH',MDY(6,1,yrmade),
	MDY(intvmo,1,intvyr));
ELSE IF YEAR GE 1996 
	THEN vehage=INTCK('MONTH',MDY(6,1,yrmade),
	MDY(intvmo,1,intvyr));

IF vehage=-1 THEN vehage=0;
*********mod gives the remainder when the first term is divided by the second, we use this to convert vehage from ******************;
*********a month to a year variable*************************************************************************************************;
IF mod(vehage,12)<6 THEN vehage=vehage-mod(vehage,12);
IF mod(vehage,12)>=6 THEN vehage=vehage+(12-mod(vehage,12));

vehageyr=vehage/12;
* floor vehageyr at zero, less than 1% are = -1;
IF vehageyr NE . THEN vehageyr=MAX(0,vehageyr);

***********************************************************************************
IMPUTE MISSING VALS FOR VEH AGE, YR MADE, & OWN_FOR
***********************************************************************************;
*If car is purchased new we can get veh age directly from purchase date;
*CREATE TEST1 TO SEE HOW MANY ARE CHANGED;
IF vehageyr=. AND vehnewu=1 AND own_for NE . THEN test1=1;
else test1=0;
IF vehageyr=. AND vehnewu=1 AND own_for NE . THEN vehageyr=ROUND(own_for/12);

*CREATE TEST2 TO SEE HOW MANY ARE CHANGED;
IF yrmade=. AND vehageyr NE . THEN test2=1;
else test2=0;

IF yrmade=. AND vehageyr NE . THEN yrmade=sv_year-vehageyr;
 
*If car is purchased new and purch date is missing
 we can get own_for directly from year of car;
*CREATE TEST3 TO SEE HOW MANY ARE CHANGED;
IF own_for=. AND vehnewu=1 AND vehageyr NE . THEN test3=1;
else test3=0;
IF own_for=. AND vehnewu=1 AND vehageyr NE . THEN own_for=vehageyr*12;

*********Dummy variables for Vehicle Characteristics********;
IF aircar NE . THEN nm_aircar=1;
	ELSE nm_aircar=0;
IF autotran NE . THEN nm_autotran=1;
	Else nm_autotran=0;
IF pwrsteer NE . THEN nm_pwrsteer=1;
	ELSE nm_pwrsteer=0;
IF pwrbrake NE . THEN nm_pwrbrake=1;
	ELSE nm_pwrbrake=0;
IF sunroof NE . THEN nm_sunroof=1;
	ELSE nm_sunroof=0;
IF turbochg NE . THEN nm_turbochg=1;
	ELSE nm_turbochg=0;
IF diesel NE . THEN nm_diesel=1;
	ELSE nm_diesel=0;
IF frwhldrv NE . THEN nm_frwhldrv=1;
	ELSE nm_frwhldrv=0;
IF numdoor NE . THEN nm_numdoor=1;
	ELSE nm_numdoor=0;
IF autotran=1 THEN at=1;
	ELSE IF autotran=2 THEN at=0;
	ELSE at=.;
IF pwrbrake=1 THEN pb=1;
	ELSE IF pwrbrake=2 THEN pb=0;	
	ELSE pb=.;
IF aircar=1 THEN ac=1;
	ELSE IF aircar=2 THEN ac=0;
	ELSE ac=.;
IF pwrsteer=1 THEN ps=1;
	ELSE IF pwrsteer=2 THEN ps=0;
	ELSE ps=.;
IF diesel=1 THEN ds=1;
	ELSE IF diesel=2 THEN ds=0;
	ELSE ds=.;
IF nm_autotran=1 THEN atx=at;
	ELSE atx=0;
IF nm_pwrbrake=1 THEN pbx=pb;
	ELSE pbx=0;
IF nm_aircar=1 THEN acx=ac;
	ELSE acx=0;
IF nm_pwrsteer=1 THEN psx=ps;
	ELSE psx=0;
IF nm_diesel=1 THEN dsx=ds;
	ELSE dsx=0;
IF sunroof NE . THEN nm_sunroof=1;
	ELSE nm_sunroof=0;
IF sunroof=1 THEN sr=1;
	ELSE IF sunroof=2 THEN sr=0;
	ELSE IF year<1988 THEN sr=0;
	ELSE sr=.;
IF turbochg NE . THEN nm_turbochg=1;
	ELSE nm_turbochg=0;
IF turbochg=1 THEN tc=1;
	ELSE IF turbochg=2 THEN tc=0;
	ELSE IF year<1988 THEN tc=0;
	ELSE tc=.;
IF frwhldrv NE . THEN nm_frwhldrv=1;
	ELSE nm_frwhldrv=0;
IF frwhldrv=1 THEN fd=1;
	ELSE IF frwhldrv=2 THEN fd=0;
	ELSE IF year<1988 THEN fd=0;
	ELSE fd=.;
IF numdoor NE . THEN nm_numdoor=1;
	ELSE nm_numdoor=0;
IF nm_sunroof=1 THEN srx=sr;
	ELSE srx=0;
IF nm_turbochg=1 THEN tcx=tc;
	ELSE tcx=0;
IF nm_frwhldrv=1 THEN fdx=fd;
	ELSE fdx=0;
IF nm_numdoor=1 THEN ndx=numdoor;
	ELSE ndx=0;
IF autotype NE . THEN nm_autotype=1;
	ELSE nm_autotype=0;
IF autotype=2 THEN convertible=1;
	ELSE convertible=0;

***The codes 301, 370, and 39 are inconsistent across years, so simply dealt with directly in code*********;
	IF mkmdly=301 AND year LT 2000 THEN mkmdly=143;
* IN 1999 AND BEYOND OLDSMOBILE BRAVADA HAS TWO CODES--370 & 536, SO RECODE THE FORMER;
	IF year>1998 AND mkmdly=370 THEN mkmdly=536;
* FOR THESE YEARS NISSAN PATHFINDER IS CODED AS BOTH 39 AND 994;
IF year in(1990,1991,1992,1993,1994,1995,1996,1997) AND mkmdly=39 THEN mkmdly=994;
* FOR THESE YEARS INFINITY I30 IS CODED AS BOTH 105 AND 108;
IF year GE 2001 AND mkmdly=108 THEN mkmdly=105;
* FOR THESE YEARS DAEWOOD LANOS IS CODED AS BOTH 231 AND 301;
IF year GE 2002 AND mkmdly=301 THEN mkmdly=231;
* TEN CARS IN 2001 ARE CODED WITH 1999, WHICH HAS NO MATCH IN VEH*.TXT;
IF year=2001 AND mkmdly=1999 THEN mkmdly=9999;

* Generate a merge year variable that combined several years with the same finalcode data;
IF year in(1980,1981,1982) THEN merge_year=1980;
   ELSE IF 1990 LE year LE 1996 THEN merge_year=1997;
   ELSE merge_year=year;

******************************************************************************************* 
SOME CODES IN THE RAW DATA ARE NOT DEFINED IN CapiVehi**.txt SO CODE TO MAKE ONLY OR OTHER.
CHECK EACH YEAR TO SEE IF NEW CODES HAVE BEEN ADDED.  THIS IS UPDATED THROUGH CapiVehi05.txt
*******************************************************************************************;
   IF mkmodel='BMWAN' THEN mkmodel='BMWXX';
   IF mkmodel='BUIAN' THEN mkmodel='BUIXX';
   IF mkmodel='CADAP' THEN mkmodel='CADXX';
   IF mkmodel IN('CHEBL','CHEBM','CHEBN','CHEBP','CHEBQ','CHEVB') THEN mkmodel='CHEXX';
   IF mkmodel IN('FORBE','FORBF','FORBI') THEN mkmodel='FORXX';
   IF mkmodel='KIAAH' THEN mkmodel='KIAXX';
   IF mkmodel='MAZAP' THEN mkmodel='MAZXX';
   IF mkmodel='MECAO' THEN mkmodel='MECXX';
   IF mkmodel='MITAR' THEN mkmodel='MITXX';
   IF mkmodel='NISAT' THEN mkmodel='NISXX';
   IF mkmodel='PONAR' THEN mkmodel='PONXX';
   IF mkmodel='SAAAF' THEN mkmodel='SAAXX';
   IF mkmodel IN('SCIAB','SCIAC','SUXAL') THEN mkmodel='XXXXX';
   IF mkmodel='SUX' THEN mkmodel='XXX';
RUN;
**************************************************
Section III: Create make and model variable that 
			 is the same across years. Also create
			 variable for make.
**************************************************;
*****************************************************************************************************************
MERGES THE NUMBER AND LETTER CODE MATRICES CREATED IN EXCEL WITH THE RAW VEHICLE DATA TO ASSOCIATE
EACH OBSERVATION WITH A FINAL VEHICLE ID CODE.  
VARIABLES USED INCLUDE:
	YEAR - SURVEY YEAR
	MKMDLY - NUMBER YEAR ID CODES, USED FOR OBSERVATIONS FROM 1980-2003Q1
	MKMODEL - LETTER YEAR ID CODES, USED FOR OBSERVATIONS FROM 2003Q2-PRESENT
	MAKE - CODE FOR VEHICLES MAKE
EXTERNAL FILES USED:
	NUMBER CODE MATRIX.TXT - TAB DELIMITED TEXT FILE ASSOCIATES WITH EACH NUMERICAL YEAR ID CODE A FINAL ID CODE, 
							 COMES FROM 'CODE ARRAY.XLS' FILE
	LETTER CODE MATRIX.TXT - TAB DELIMITED TEXT FILE ASSOCIATES WITH EACH CHARACTER YEAR ID CODE A FINAL ID CODE, 
						     COMES FROM 'CODE ARRAY.XLS' FILE
	3 LETTER TO 5 LETTER.TXT - TAB DELIMITED TEXT FILE THAT IS A LIST OF ALL CHARACTER CODES THAT ARE 3 CHARACTERS
							   IN LENGTH ASSOCIATED 5 LETTER CODE, COMES FROM '3 LETTER TO 5 LETTER.XLS' FILE
*****************************************************************************************************************;

*****************************************************************************************************************
READ IN NUMBER CODE MATRIX DATA
*****************************************************************************************************************;
DATA numbercodematrix;
INFILE "D:\Documents\CEXdata\Cars\cars_data\number code matrix.txt"  delimiter='09'x;
INPUT merge_year mkmdly finalcode make;
IF mkmdly='.' THEN DELETE;
RUN;

PROC SORT DATA=numbercodematrix;
	BY merge_year mkmdly;
RUN;

*****************************************************************************************************************
ELIMIMINATE REPETATIVE CODES IN NUMBER CODE DATA.  THIS WAS DONE MANUALLY IN EXCLE, SO CODE IS NOW IRRELAVANT
*****************************************************************************************************************;
DATA numbercodematrix;
SET numbercodematrix;
count +1;
BY merge_year mkmdly;
IF first.merge_year or first.mkmdly THEN count=1;
IF count NE 1 THEN DELETE;
RUN;

*****************************************************************************************************************
READ IN CHARACTER CODE MATRIX DATA
*****************************************************************************************************************;
DATA lettercodematrix;
LENGTH mkmodel $ 5;
INFILE "D:\Documents\CEXdata\Cars\cars_data\letter code matrix.txt"  delimiter='09'x;
INPUT merge_year mkmodel $ finalcode make;
IF mkmodel='' THEN DELETE;
RUN;
PROC SORT DATA=lettercodematrix;
	BY merge_year mkmodel;
RUN;

*****************************************************************************************************************
ELIMIMINATE REPETATIVE CODES IN NUMBER CODE DATA.  THIS WAS DONE MANUALLY IN EXCLE, SO CODE IS NOW IRRELAVANT
*****************************************************************************************************************;
DATA lettercodematrix;
SET lettercodematrix;
count +1;
BY merge_year mkmodel;
IF first.merge_year or first.mkmodel THEN count=1;
IF count NE 1 THEN DELETE;
RUN;

*****************************************************************************************************************
READ IN CHARACTER CODE MATRIX DATA
*****************************************************************************************************************;
DATA lettercodematrix0617;
LENGTH mkmodel $ 5;
INFILE "D:\Documents\CEXdata\Cars\cars_data\letter_code_06_17.txt"  delimiter='09'x;
INPUT merge_year mkmodel $ finalcode make;
IF mkmodel='' THEN DELETE;
RUN;


*****************************************************************************************************************
COMBINE NUMBER AND CHARACTER CODE MATRICES INTO SINGLE MATRIX
*****************************************************************************************************************;
DATA codematrix;
SET numbercodematrix lettercodematrix lettercodematrix0617;
RUN;
PROC MEANS;
RUN;
PROC SORT DATA=codematrix;
	BY merge_year mkmdly mkmodel;
RUN;


*****************************************************************************************************************
MERGE AUTOMOBILE DATA WITH CODE MATRIX TO ASSIGN APPROPRIATE FINAL ID TO EACH OBSERVATION
*****************************************************************************************************************;
PROC SORT DATA=carslib.master2;
	BY merge_year mkmdly mkmodel;
RUN;

DATA carslib.master2;
	MERGE carslib.master2(in=IN1) codematrix(DROP=count);
	BY merge_year mkmdly mkmodel;
	IF in1;

*********create a dummy variable for "just make"***************;
IF finalcode in(1003, 1, 18, 7032, 696, 40, 47, 7061, 7062, 7063, 72, 68, 83, 935, 84, 99, 110, 115, 146, 
			  7156, 9452, 9990, 7167, 171, 688, 993, 196, 959, 7217, 7226, 227, 238, 262, 436, 295, 305, 
			  9005, 928, 557, 312, 778, 318, 329, 554, 342, 9052, 252, 349, 9357, 365, 9964, 571, 396, 
			  411, 1129, 431, 913, 153, 445, 882, 459, 466, 474, 494, 511, 520, 527, 534, 542, 297, 550, 
			  7554, 563, 574, 7585, 1004, 591, 605, 7622, 624, 641, 714, 79, 34, 9137, 7789, 9707
			  9139 9140 9141) THEN just_make=1;
ELSE just_make=0;

IF vehnewu=1 THEN vehnewu2=1;
   ELSE vehnewu2=0;
RUN;
PROC MEANS;
RUN;


***********************************************************************************
IMPUTE MISSING VALS WHEN VEHAGE & YRMADE ARE MISSING
***********************************************************************************;
PROC SORT DATA=carslib.master2;
	BY qyear vehnewu2;
RUN;
PROC MEANS NOPRINT DATA=carslib.master2;
	BY qyear vehnewu2;
	WHERE vehageyr NE . AND own_for NE .;
	VAR vehageyr own_for;
	OUTPUT OUT=mean_dat MEAN=vage_bar ownf_bar;
RUN;

DATA carslib.master2(COMPRESS=YES);
MERGE carslib.master2(in=in1) mean_dat(KEEP=qyear vehnewu2 vage_bar ownf_bar);
BY qyear vehnewu2;
IF in1;

* there are no finalcode in the 2000s so those will be the missing make-models and non-cars;
** CREATE SEPARATE GROUP FOR CARS (vehicyb=100) AND TRUCKS/VANS (vehicyb=110) AND 
   NON-CARS (vehicyb > 110);
***EDITED THIS WHEN ADDED 09 DATA, ADDING: "AND vehicyb NE ." 
   OLD CODE WOULD HAVE LEFT MKMDLY3 MISSING WHEN VEHICYB WAS MISSING
   VEHICYB IS ONLY MISSING FOR ONE OBS OVER ALL YEARS;
IF (finalcode=. OR finalcode=9999) AND vehicyb NE . THEN mkmdly3=2000+vehicyb;
   ELSE IF (finalcode=. OR finalcode=9999) AND vehicyb=. THEN mkmdly3=2000;
   ELSE mkmdly3=finalcode;

* RECODE FOR MAKE;
IF finalcode=. OR finalcode=9999 THEN make=2000+vehicyb;
   ELSE IF (finalcode=. OR finalcode=9999) AND vehicyb=. THEN make=2000+vehicyb;

** CREATE SEPARATE GROUP FOR CARS THAT ARE MAKE ONLY, COMBINIGN MAKE AND VEHICYB 
   (I.E. DISTINGUISH BETWEEN CARS (vehicyb=100) AND TRUCKS/VANS (vehicyb=110);
** NOTE, THIS MAKES A 5-DIGIT MKMODLY3, X-NEW CHANGE-X;
IF just_make=1 AND finalcode NE . AND finalcode NE 9999 AND vehicyb=100 
	THEN mkmdly3=10000+finalcode;
IF just_make=1 AND finalcode NE . AND finalcode NE 9999 AND vehicyb=110 
	THEN mkmdly3=20000+finalcode;   
 
IF vehageyr=. THEN vehageyr2=MAX(ROUND(own_for/12),ROUND(vage_bar));
   ELSE vehageyr2=vehageyr;

* CALCULATE THE FRACTION IMPUTED;
IF vehageyr2 NE vehageyr THEN imp_vehageyr=1;
   ELSE imp_vehageyr=0;

IF yrmade=. THEN yrmade2=sv_year-vehageyr2;
   ELSE yrmade2=yrmade;

* CALCULATE THE FRACTION IMPUTED;
IF yrmade NE yrmade2 THEN imp_yrmade=1;
   ELSE imp_yrmade=0;


*********create variable for make, model, and year*************;
mmy_id2=yrmade2*100000+mkmdly3;

IF own_for=. THEN own_for2=MIN(vehageyr2*12,ROUND(ownf_bar));
ELSE own_for2=own_for;

* CALCULATE THE FRACTION IMPUTED;
IF own_for NE own_for2 THEN imp_own_for=1;
   ELSE imp_own_for=0;

make_year=yrmade2*10000+make;

RUN;
PROC MEANS;
RUN;

******************************************************************
	SECTION IV: Merge Automobile Data with Demographic Data
*****************************************************************;


PROC MEANS DATA=carslib.demog8017;
RUN;
PROC SORT DATA=carslib.demog8017;
	BY newid;
RUN;
PROC SORT DATA=carslib.master2;
	BY newid;
RUN;

DATA carslib.master2(COMPRESS=YES);
MERGE carslib.master2(in=in1) carslib.demog8017(DROP=qyear);
BY newid;
IF in1;

expend2=expend*expend;
age_ref2=age_ref*age_ref;

RUN;

DATA match;
SET carslib.master2(KEEP= mmy_id2 vehageyr2 just_make mkmdly3 
							yrmade2 totpurx vehgftc own_for2 make_year);
IF totpurx GT 0 AND vehgftc NE 3 AND 0 LE own_for2 LE 12;
KEEP mmy_id2 vehageyr2 just_make mkmdly3 yrmade2 make_year;
RUN;
PROC MEANS;
RUN;

********************************************************************
create match indicator in estimation sample 
********************************************************************;

********* MATCH 1: make model year and age *************************;
PROC SORT DATA=match;
	BY mmy_id2 vehageyr2;
RUN;
DATA match1;
SET match;
BY mmy_id2 vehageyr2;
IF FIRST.mmy_id2 OR FIRST.vehageyr2;
mmyage_match=1;
IF just_make=1 THEN DELETE;
KEEP mmy_id2 vehageyr2 mmyage_match;
RUN;

********* MATCH 2: make year and age *************************;
DATA match2;
SET match;
BY mmy_id2 vehageyr2;
IF FIRST.mmy_id2 OR FIRST.vehageyr2;
myage_match=1;
IF just_make=1;
KEEP mmy_id2 vehageyr2 myage_match;
RUN;

********* MATCH 3: make model and year *************************;
DATA match3;
SET match;
BY mmy_id2;
IF FIRST.mmy_id2;
mmy_match=1;
IF just_make=1 THEN DELETE;
KEEP mmy_id2 mmy_match;
RUN;

********* MATCH 4: make and year *************************;
DATA match4;
SET match;
BY mmy_id2;
IF FIRST.mmy_id2;
my_match=1;
IF just_make=1; 
KEEP mmy_id2 my_match;
RUN;

********* MATCH 5: make model and age *************************;
PROC SORT DATA=match;
	BY mkmdly3 vehageyr2;
RUN;

DATA match5;
SET match;
BY mkmdly3 vehageyr2;
IF FIRST.mkmdly3 OR FIRST.vehageyr2;
mmage_match=1;
IF just_make=1 THEN DELETE;
KEEP mkmdly3 vehageyr2 mmage_match;
RUN;

********* MATCH 6: make and age *************************;
DATA match6;
SET match;
BY mkmdly3 vehageyr2;
IF FIRST.mkmdly3 OR FIRST.vehageyr2;
mage_match=1;
IF just_make=1;
KEEP mkmdly3 vehageyr2 mage_match;
RUN;

********* MATCH 6.5: make & model *************************;
DATA match65;
SET match;
BY mkmdly3;
IF FIRST.mkmdly3;
mm_match=1;
IF just_make=1 THEN DELETE;
KEEP mkmdly3 mm_match;
RUN;

********* MATCH 6.6: make  *************************;
DATA match66;
SET match;
BY mkmdly3;
IF FIRST.mkmdly3;
m_match=1;
IF just_make=1;
KEEP mkmdly3 m_match;
RUN;

********* MATCH 7: year and age *************************;
PROC SORT DATA=match;
	BY yrmade2 vehageyr2;
RUN;
DATA match7;
SET match;
BY yrmade2 vehageyr2;
IF FIRST.yrmade2 OR FIRST.vehageyr2;
yage_match=1;
KEEP yrmade2 vehageyr2 yage_match;
RUN;

********* MATCH 8: year *************************;
DATA match8;
SET match;
BY yrmade2;
IF FIRST.yrmade2;
y_match=1;
KEEP yrmade2 y_match;
RUN;

********* MATCH 85: year, in 2000 Group *************************;
DATA match85;
SET match;
IF 2000 LE mkmdly3 LE 2999;
RUN;
PROC SORT DATA=match85;
	BY yrmade2;
RUN;
DATA match85;
SET match85;
BY yrmade2;
IF FIRST.yrmade2;
y9999_match=1;
KEEP yrmade2 y9999_match;
RUN;

********* MATCH 9: age *************************;
PROC SORT DATA=match;
	BY vehageyr2;
RUN;
DATA match9;
SET match;
BY vehageyr2;
IF FIRST.vehageyr2;
age_match=1;
KEEP vehageyr2 age_match;
RUN;

********* MATCH 10: make_year *************************;
PROC SORT DATA=match;
	BY make_year;
RUN;
DATA match10;
SET match;
BY make_year;
IF FIRST.make_year;
make_year_match=1;
KEEP make_year make_year_match;
RUN;

****************************************************************************
MERGE MATCH VARS WITH MASTER DATA
****************************************************************************;

PROC SORT DATA=carslib.master2;
	BY mmy_id2 vehageyr2;
RUN;
DATA carslib.master2(COMPRESS=YES);
MERGE carslib.master2(IN=in1) match1 match2;
BY mmy_id2 vehageyr2;
IF in1;
RUN;
DATA carslib.master2(COMPRESS=YES);
MERGE carslib.master2(IN=in1) match3 match4;
BY mmy_id2;
IF in1;
RUN;

PROC SORT DATA=carslib.master2;
	BY mkmdly3 vehageyr2;
RUN;
DATA carslib.master2(COMPRESS=YES);
MERGE carslib.master2(IN=in1) match5 match6;
BY mkmdly3 vehageyr2;
IF in1;
RUN;
DATA carslib.master2(COMPRESS=YES);
MERGE carslib.master2(IN=in1) match65 match66;
BY mkmdly3;
IF in1;
RUN;

PROC SORT DATA=carslib.master2;
	BY yrmade2 vehageyr2;
RUN;
DATA carslib.master2(COMPRESS=YES);
MERGE carslib.master2(IN=in1) match7;
BY yrmade2 vehageyr2;
IF in1;
RUN;
DATA carslib.master2(COMPRESS=YES);
MERGE carslib.master2(IN=in1) match8 match85;
BY yrmade2;
IF in1;
RUN;

PROC SORT DATA=carslib.master2;
	BY vehageyr2;
RUN;
DATA carslib.master2(COMPRESS=YES);
MERGE carslib.master2(IN=in1) match9;
BY vehageyr2;
IF in1;
RUN;

PROC SORT DATA=carslib.master2;
	BY make_year;
RUN;
filename out_dat1 'H:\My Drive\Private\Poverty Trends\vehicles\output\freqs.txt';

DATA carslib.master2(COMPRESS=YES);
MERGE carslib.master2(IN=in1) match10;
BY make_year;
IF in1;

*ESTIMATION SAMPLE;
IF totpurx GT 0 AND vehgftc NE 3 AND 0 LE own_for2 LE 12 AND 2000 LE mkmdly3 LE 2999 THEN group=1;
   ELSE IF totpurx GT 0 AND vehgftc NE 3 AND 0 LE own_for2 LE 12 THEN group=2;
*OBSERVE TOTPURX BUT NOT IN ESTMATION SAMPLE;
   ELSE IF totpurx GT 0 AND vehgftc NE 3 THEN group=3;
*IN PREDICTION SAMPLE 1, HAVE A MATCH ON MAKE, MODEL, YEAR;
   ELSE IF mmyage_match=1 THEN group=4;
   ELSE IF myage_match=1 THEN group=5;
   ELSE IF mmy_match=1 THEN group=6;
   ELSE IF my_match=1 THEN group=7;
*IN PREDICTION SAMPLE 2, HAVE A MATCH ON MAKE BUT NOT YEAR
   SO BROADEN THE MATCH CRITERIA TO MAKE;
   ELSE IF make_year_match=1 THEN group=8;
*IN PREDICTION SAMPLE 1, OBSERVE MAKE, ONLY MATCH IS IN 9999 GROUP
 NOTE: NEED TO RECODE THESE TO BE IN 9999 GROUP;
   ELSE IF (mkmdly3 LT 2000 OR mkmdly3 GT  2999) AND y9999_match=1 THEN group=9;
*RESIDUAL;
   ELSE group=10;

vehageyr2_2=vehageyr2*vehageyr2;
vehageyr2_3=vehageyr2*vehageyr2*vehageyr2;

*PUT THOSE WITH ONLY A YRMADE MATCH BUT MK/MODEL OBSERVED IN THE 9999 GROUP;
IF group=9 THEN mmy_id2=yrmade2*10000+(2000+vehicyb);

*RECODE REGION;
IF regn1 NE . THEN nm_regn=1;
	ELSE nm_regn=0;

IF regn1=1 THEN reg1=1;
   ELSE reg1=0;
IF regn2=1 THEN reg2=1;
   ELSE reg2=0;
IF regn3=1 THEN reg3=1;
   ELSE reg3=0;
IF regn4=1 THEN reg4=1;
   ELSE reg4=0;

IF group IN(1,2) THEN lntotpurx=log(totpurx);



IF sv_year=1978 THEN cpi_u_rs05=2.748;
ELSE IF sv_year=1979 THEN cpi_u_rs05=2.51;
ELSE IF sv_year=1980 THEN cpi_u_rs05=2.257;
ELSE IF sv_year=1981 THEN cpi_u_rs05=2.063;
ELSE IF sv_year=1982 THEN cpi_u_rs05=1.945;
ELSE IF sv_year=1983 THEN cpi_u_rs05=1.865;
ELSE IF sv_year=1984 THEN cpi_u_rs05=1.791;
ELSE IF sv_year=1985 THEN cpi_u_rs05=1.732;
ELSE IF sv_year=1986 THEN cpi_u_rs05=1.702;
ELSE IF sv_year=1987 THEN cpi_u_rs05=1.645;
ELSE IF sv_year=1988 THEN cpi_u_rs05=1.588;
ELSE IF sv_year=1989 THEN cpi_u_rs05=1.521;
ELSE IF sv_year=1990 THEN cpi_u_rs05=1.45;
ELSE IF sv_year=1991 THEN cpi_u_rs05=1.399;
ELSE IF sv_year=1992 THEN cpi_u_rs05=1.365;
ELSE IF sv_year=1993 THEN cpi_u_rs05=1.331;
ELSE IF sv_year=1994 THEN cpi_u_rs05=1.304;
ELSE IF sv_year=1995 THEN cpi_u_rs05=1.273;
ELSE IF sv_year=1996 THEN cpi_u_rs05=1.24;
ELSE IF sv_year=1997 THEN cpi_u_rs05=1.214;
ELSE IF sv_year=1998 THEN cpi_u_rs05=1.198;
ELSE IF sv_year=1999 THEN cpi_u_rs05=1.173;
ELSE IF sv_year=2000 THEN cpi_u_rs05=1.134;
ELSE IF sv_year=2001 THEN cpi_u_rs05=1.103;
ELSE IF sv_year=2002 THEN cpi_u_rs05=1.086;
ELSE IF sv_year=2003 THEN cpi_u_rs05=1.062;
ELSE IF sv_year=2004 THEN cpi_u_rs05=1.034;
ELSE IF sv_year=2005 THEN cpi_u_rs05=1;
ELSE IF sv_year=2006 THEN cpi_u_rs05=0.969;
ELSE IF sv_year=2007 THEN cpi_u_rs05=0.942;
ELSE IF sv_year=2008 THEN cpi_u_rs05=0.907;
ELSE IF sv_year=2009 THEN cpi_u_rs05=0.91;
ELSE IF sv_year=2010 THEN cpi_u_rs05=0.895;
ELSE IF sv_year=2011 THEN cpi_u_rs05=0.868;
ELSE IF sv_year=2012 THEN cpi_u_rs05=0.85;
ELSE IF sv_year=2013 THEN cpi_u_rs05=0.838;
ELSE IF sv_year=2014 THEN cpi_u_rs05=0.824;
ELSE IF sv_year=2015 THEN cpi_u_rs05=0.822;
ELSE IF sv_year=2016 THEN cpi_u_rs05=0.812;
ELSE IF sv_year=2017 THEN cpi_u_rs05=0.795;
ELSE IF sv_year=2018 THEN cpi_u_rs05=0.780;  /*NOTE: this is a place holder */


* COMES FROM ~/Poverty Trends/PRICES_CARS.XLS, USES WGT'D AVG OF NEW
  AND USED CAR SERIES GIVEN THAT ABOUT 60% OF ALL CARS
  ARE PURCHASED USED;
IF sv_year=1978 THEN cars_cpi05=2.225;
ELSE IF sv_year=1979 THEN cars_cpi05=2.063;
ELSE IF sv_year=1980 THEN cars_cpi05=1.967;
ELSE IF sv_year=1981 THEN cars_cpi05=1.676;
ELSE IF sv_year=1982 THEN cars_cpi05=1.508;
ELSE IF sv_year=1983 THEN cars_cpi05=1.399;
ELSE IF sv_year=1984 THEN cars_cpi05=1.281;
ELSE IF sv_year=1985 THEN cars_cpi05=1.256;
ELSE IF sv_year=1986 THEN cars_cpi05=1.268;
ELSE IF sv_year=1987 THEN cars_cpi05=1.222;
ELSE IF sv_year=1988 THEN cars_cpi05=1.183;
ELSE IF sv_year=1989 THEN cars_cpi05=1.158;
ELSE IF sv_year=1990 THEN cars_cpi05=1.167;
ELSE IF sv_year=1991 THEN cars_cpi05=1.147;
ELSE IF sv_year=1992 THEN cars_cpi05=1.106;
ELSE IF sv_year=1993 THEN cars_cpi05=1.04;
ELSE IF sv_year=1994 THEN cars_cpi05=0.992;
ELSE IF sv_year=1995 THEN cars_cpi05=0.926;
ELSE IF sv_year=1996 THEN cars_cpi05=0.917;
ELSE IF sv_year=1997 THEN cars_cpi05=0.936;
ELSE IF sv_year=1998 THEN cars_cpi05=0.94;
ELSE IF sv_year=1999 THEN cars_cpi05=0.936;
ELSE IF sv_year=2000 THEN cars_cpi05=0.924;
ELSE IF sv_year=2001 THEN cars_cpi05=0.915;
ELSE IF sv_year=2002 THEN cars_cpi05=0.945;
ELSE IF sv_year=2003 THEN cars_cpi05=0.986;
ELSE IF sv_year=2004 THEN cars_cpi05=1.03;
ELSE IF sv_year=2005 THEN cars_cpi05=1;
ELSE IF sv_year=2006 THEN cars_cpi05=1;
ELSE IF sv_year=2007 THEN cars_cpi05=1.014;
ELSE IF sv_year=2008 THEN cars_cpi05=1.025;
ELSE IF sv_year=2009 THEN cars_cpi05=1.023;
ELSE IF sv_year=2010 THEN cars_cpi05=0.984;
ELSE IF sv_year=2011 THEN cars_cpi05=0.958;
ELSE IF sv_year=2012 THEN cars_cpi05=0.95;
ELSE IF sv_year=2013 THEN cars_cpi05=0.947; 
ELSE IF sv_year=2014 THEN cars_cpi05=0.948; 
ELSE IF sv_year=2015 THEN cars_cpi05=0.949;
ELSE IF sv_year=2016 THEN cars_cpi05=0.954;
ELSE IF sv_year=2017 THEN cars_cpi05=0.967;
ELSE IF sv_year=2018 THEN cars_cpi05=0.964;  /*NOTE: this is a place holder, not based on all of 2018*/




r_expend=expend*cpi_u_rs05;
r_totpurx=totpurx*cars_cpi05;

IF group IN(1,2) THEN ln_rtotpurx=log(r_totpurx);

IF r_expend GT 0 THEN ln_r_expend=log(r_expend);
   ELSE ln_r_expend=0;

RUN;
PROC MEANS;
RUN;
PROC FREQ data=carslib.master2;
TABLE group / out=out_1;
RUN;
PROC FREQ data=carslib.master2;
WHERE vehicyb LE 110;
TABLE group / out=out_2;
RUN;
PROC FREQ data=carslib.master2;
WHERE vehicyb GT 110;
TABLE group / out=out_3;
RUN;

DATA _NULL_;
   SET out_1 out_2 out_3;
   FILE out_dat1 lrecl=500;
   PUT group COUNT PERCENT;
RUN;

PROC SORT DATA=carslib.master2;
	BY mmy_id2;
RUN;


*CALCULATE THE MEANS FOR THE ESTIMATION SAMPLE TO COMPUTED PREDICTED VALUES;
PROC MEANS NOPRINT DATA=carslib.master2 ;
   BY mmy_id2;
   WHERE group IN(1,2);
   VAR ln_rtotpurx vehageyr2 vehageyr2_2 vehageyr2_3 nm_autotran nm_pwrbrake nm_aircar nm_pwrsteer 
		nm_diesel nm_sunroof nm_turbochg nm_frwhldrv srx tcx fdx atx psx pbx acx dsx 
		ln_r_expend ed1 ed2 ed3 fam1 fam2 fam3 fam4 fam_size age_ref age_ref2 reg1-reg3 nm_regn;
   OUTPUT OUT=mean_dat MEAN=avg1-avg35 STD=mmy_std1 mmy_std2;
RUN;

DATA carslib.master3(COMPRESS=YES);
   MERGE carslib.master2(IN=in1) 
		 mean_dat(KEEP=mmy_id2 avg1-avg35 mmy_std1 mmy_std2 _FREQ_ RENAME=(_FREQ_=mmy_count));
   BY mmy_id2;

ARRAY regvar(1:35) ln_rtotpurx vehageyr2 vehageyr2_2 vehageyr2_3 nm_autotran 
						nm_pwrbrake nm_aircar nm_pwrsteer nm_diesel nm_sunroof 
						nm_turbochg nm_frwhldrv srx tcx fdx atx psx pbx acx dsx 
						ln_r_expend ed1 ed2 ed3 fam1 fam2 fam3 fam4 fam_size age_ref 
						age_ref2 reg1-reg3 nm_regn;
ARRAY varmean(1:35) avg1-avg35;
ARRAY d_mean(1:35)	d_ln_rtotpurx d_vehageyr2 d_vehageyr2_2 d_vehageyr2_3 d_nm_autotran 
						d_nm_pwrbrake d_nm_aircar d_nm_pwrsteer d_nm_diesel d_nm_sunroof 
						d_nm_turbochg d_nm_frwhldrv d_srx d_tcx d_fdx d_atx d_psx d_pbx d_acx 
						d_dsx d_ln_r_expend d_ed1 d_ed2 d_ed3 d_fam1 d_fam2 d_fam3 d_fam4 
						d_fam_size d_age_ref d_age_ref2 d_reg1-d_reg3 d_nm_regn;

DO t=1 TO 35;
   d_mean(t)=regvar(t)-varmean(t);
END;
DROP t;

id=_N_;

IF (mkmdly3 IN (2100,2110)) OR (group=9 AND vehicyb IN (100,110)) THEN other_mm=1;
   ELSE other_mm=0;

IF group IN(1,2) THEN sample=1;
   ELSE IF group IN(3) THEN sample=2;
   ELSE IF group IN(4,5,6,7,9) THEN sample=3;
   ELSE IF group IN(8) THEN sample=4;
   ELSE IF group IN(10) THEN sample=5;

DROP dat2 autotran pwrsteer pwrbrake aircar sunroof turbochg diesel frwhldrv numdoor;
RUN;

proc compare base=carslib.master3
             compare=carslib.master3_bak
			 novalues
;
run;



**************************************************************************
*RUN THIS TO GET DESCRIPTIVE STATS FOR TABLE
**************************************************************************;

DATA temp;
SET carslib.master3(KEEP=group vehicyb mkmdly3 just_make vehageyr2 other_mm sample);

IF vehicyb IN(100,110) THEN car_truck=1;
   ELSE car_truck=0;

IF 2000 LE mkmdly3 LE 2999 OR mkmdly3=. THEN all_other=1;  * 1 obs has mkmdly3=.;
   ELSE all_other=0;

RUN;

PROC SORT DATA=temp;
	BY sample;
RUN;

PROC MEANS DATA=temp;
   BY sample;
   VAR car_truck other_mm just_make vehageyr2 all_other;
   OUTPUT OUT=tempdat1 MEAN=avg1-avg5;
RUN;
filename out_dat2 'H:\My Drive\Private\Poverty Trends\vehicles\output\means.txt';
DATA _NULL_;
   SET tempdat1;
   FILE out_dat2 lrecl=500;
   PUT avg1-avg5;
RUN;


